import csv
import sys
import re
import datetime as dt

r1 = re.compile(r'Transport,[0-9]*,(20\.127\.164\.233|74\.235\.194\.78|74\.235\.128\.47)]')
r2 = re.compile(r'CMD: |TOKEN|EXEC|logging|seconds')
r3 = re.compile(r'Transport,[0-9]+')
honeydirs = ['cowrie_basic_logs', 'cowrie_llm_logs', 'cowrie_base_llm_logs']


class LogData():

    def __init__(self):
        self.tokens_used = 0
        self.llm_resp_time = 0.0
        self.num_inputs = 0
        self.session_time = 0.0
        self.inputs = []

    def __repr__(self):
        return 'tokens_used: {}\nllm_resp_time: {}\nnum_inputs: {}\nsession_time: {}\ninputs: {}\n'.format(self.tokens_used, self.llm_resp_time, self.num_inputs, self.session_time, self.inputs)


def llm_log(filtered_log, log_data):
    for session in filtered_log.keys():
        if session not in log_data.keys():  # init class for storing data for each session
            log_data[session] = LogData()

        for line in filtered_log[session]:
            if 'LLM CMD EXEC TIME:' in line:
                try:
                    log_data[session].llm_resp_time += float(line.split(' ')[-2])
                except ValueError:
                    log_data[session].llm_resp_time += float(line.split(' ')[-1])
                print(line.split(' ')[-2])
            elif 'TOKEN USE:' in line:
                print(line.split(' ')[-1])
                log_data[session].tokens_used += int(line.split(' ')[-1])


def cowrie_log(filtered_log, log_data):
    for session in filtered_log.keys():
        if session not in log_data.keys():  # init class for storing data for each session
            log_data[session] = LogData()

        for line in filtered_log[session]:
            if 'CMD:' in line:
                log_data[session].num_inputs += 1
                line_split = line.split(' ')
                log_data[session].inputs.append(' '.join(line_split[line_split.index('CMD:')+1:]))

            if 'Connection lost after' in line:
                log_data[session].session_time = float(line.split(' ')[-2])


def main():
    if len(sys.argv) < 1:
        print("ERROR: USAGE python3 extract.py <date>")
        sys.exit()
    elif len(sys.argv) < 2:
        d1 = dt.datetime.now().strftime("%Y-%m-%d")
        logfile = "cowrie.log"
    else:
        d1 = sys.argv[1]
        logfile = "cowrie.log.{}".format(d1)

    for honeydir in honeydirs:
        log_data = {}
        filtered_log = {}

        with open("{}/{}".format(honeydir, logfile), 'r') as f:
            loglines = f.readlines()
        for log in loglines:
            if re.search(r1, log) and re.search(r2, log):
                session_id = ''.join(re.findall(r3, log)).split(',')[1]
                if 'SSH' in log:
                    session_id = 'SSH_' + session_id
                else:
                    session_id = 'TEL_' + session_id
                if session_id not in filtered_log.keys():
                    filtered_log[session_id] = [log]
                else:
                    filtered_log[session_id].append(log)

        if 'llm' in honeydir:
            llm_log(filtered_log, log_data)
        cowrie_log(filtered_log, log_data)

        for key, obj in dict(log_data).items():
            if obj.num_inputs == 0:
                del log_data[key]

        with open('{}/{}_data.csv'.format(honeydir, d1), 'w', newline='') as file:
            writer = csv.writer(file)
            writer.writerow(['session_id', 'tokens_used', 'llm_resp_time', 'num_inputs', 'session_time', 'inputs'])
            for session_id in log_data.keys():
                writer.writerow([session_id, log_data[session_id].tokens_used, log_data[session_id].llm_resp_time,
                                 log_data[session_id].num_inputs, log_data[session_id].session_time, log_data[session_id].inputs])


if __name__ == "__main__":
    main()
